from time import sleep
import datetime
import re

import requests
from lxml import etree
from pymongo import MongoClient

from freeProxyWebs.proxyweb import ProxyWebs

class EightNineProxy(ProxyWebs):
    ##获取89网的代理ip  http://www.89ip.cn/
    def _get_web(self,url):
        return super()._get_web(url)
    
    def _parser(self,html):
        ##解析网页,获取代理
        tree = etree.HTML(html)
        treeProxyList = tree.xpath(
            '//table[@class="layui-table"]/tbody/tr')
        for treeProxy in treeProxyList:
            proxyInfos = treeProxy.xpath('td/text()')
            proxyIP = proxyInfos[0].strip()
            proxyPort = proxyInfos[1].strip()
            oneProxy = {'ip':proxyIP,'port':proxyPort,'protocol':'http'}
            self._proxyList.append(oneProxy)
    
    def refresh_daily_proxy(self):
        ##更新每日代理
        maxPage = 10    #最多爬的页数
        i = 1
        url = self._url
        while i <= maxPage:
            self._parser(self._get_web(url))
            i += 1
            url = url + f'{i}.html'
            sleep(1)
    
    def init_proxy_pool(self):
        ##初始化代理池,爬取前20页代理
        url = self._url
        i = 1
        while i <= 20:
            self._parser(self._get_web(url))
            print(url + f' 第{i}页完成')
            i += 1
            url = self._url + f'{i}.html'
            sleep(0.5)
        #将保存的集合初始化
        client = MongoClient('mongodb://localhost:27017/')
        with client:
            db = client.ipPool
            try:
                db.eightnine.drop()
            except:
                print('没有集合，直接创建')
            finally:
                db.eightnine.insert_many(self._proxyList)
                print("89网集合初始化完成")
    
    def save_proxies(self):
        ##将代理列表保存入数据库
        client = MongoClient('mongodb://localhost:27017/')
        with client:
            db = client.ipPool
            db.eightnine.insert_many(self._proxyList)
            print("89代理保存完成")